* 3: MGNREGS outcomes
* Figure 3
* Table 6, A8, A9, A12, A13, A14, A15
gl data ".../Replication Files/data"



* Create Dataset with MGNREGS Outcomes & GP Population
{
* 1 - Import & Clean MGNREGS Data
{
* Import
foreach x in 12 13 14 15 {
local first = 2000+`x'
local second = `x'+1

import excel "$data/NREGA `first'-`second'.xls", firstrow clear
gen year = `first'

if `x'!=12 {
append using "$data/nrega_gujarat.dta"
}
save "$data/nrega_gujarat.dta", replace
}
*


* Clean 
{
ren DistrictName District
ren PanchayatName Panchayat

drop if District=="0"

foreach x in District BlockName Panchayat {
replace `x' = upper(subinstr(`x'," ","",.))
}

* District Name Changes
{
* Aravalli was split from Sabarkantha.
tab BlockName if District=="ARVALLI"
replace District="ARVALLI" if BlockName=="BAYAD" | BlockName=="BHILODA" | BlockName=="DHANSURA" | BlockName=="MALPUR" | BlockName=="MEGHRAJ" | BlockName=="MODASA"

* Botad was created from parts of Ahmedabad and Bhavanagar districts.
tab BlockName if District=="BOTAD"
replace District="BOTAD" if BlockName=="BARWALA" | BlockName=="BOTAD" | BlockName=="GADHADA" | BlockName=="RANPUR"

*Chhota Udaipur was split from Vadodara District.
tab BlockName if District=="CHHOTAUDEPUR"
replace District="CHHOTAUDEPUR" if BlockName=="BODELI" | BlockName=="CHHOTAUDAIPUR" | BlockName=="JETPURPAVI" | BlockName=="KAWANT" | BlockName=="NASVADI" | BlockName=="SANKHEDA"

*Devbhoomi Dwarka was split from Jamnagar.
tab BlockName if District=="DEVBHUMIDWARKA"
replace District="DEVBHUMIDWARKA" if BlockName=="BHANVAD" | BlockName=="KALYANPUR" | BlockName=="KHAMBHALIA" | BlockName=="OKHAMANDAL"

*Mahisagar was created from parts of Kheda and Panchmahal.
tab BlockName if District=="MAHISAGAR"
replace District="MAHISAGAR" if BlockName=="BALASINOR" | BlockName=="KADANA" | BlockName=="KHANPUR" | BlockName=="LUNAWADA" | BlockName=="SANTRAMPUR" | BlockName=="VIRPUR"

*Morbi was created from parts of Rajkot, Surendranagar and Jamnagar districts.
tab BlockName if District=="MORBI"
replace District="MORBI" if BlockName=="HALVAD" | BlockName=="MALIYA" | BlockName=="MORBI" | BlockName=="TANKARA" | BlockName=="WANKANER"

*Gir Somnath was split from Junagadh.
tab BlockName if District=="GIRSOMNATH"
replace District="GIRSOMNATH" if BlockName=="KODINAR" | BlockName=="PATANVERAVAL" | BlockName=="SUTRAPADA" | BlockName=="TALALA" | BlockName=="UNA"
}
}
*

egen gpid_nr = group(District BlockName Panchayat)
}
*
save "$data/nrega_gujarat.dta", replace



* 2 - GP-level dataset (for merging)
use "$data/nrega_gujarat.dta", clear
{
duplicates drop gpid_nr, force

keep gpid_nr* District BlockName Panchayat
drop if gpid_nr==.

ren District DistrictName
ren Panchayat PanchayatName
}
save "$data/gpid_nr.dta", replace



* 3 - Merge with GP-level population
use "$data/vill_lgd_census11_01.dta", clear
{
bysort gp_id: egen sc = sum(SC_P)
bysort gp_id: egen st = sum(ST_P)
gen prop_sc = sc/gp_pop
gen prop_st = st/gp_pop

duplicates drop gp_id, force
duplicates tag DistrictName BlockName PanchayatName, gen(tag)
drop if tag>0
drop tag

reclink DistrictName BlockName PanchayatName using "$data/gpid_nr.dta", gen(score1) idm(gp_id) idu(gpid_nr)

* keep best matches for each gp_id
replace score1=0 if score1==.
bysort gpid_nr: egen max_merge = max(score1)
replace gpid_nr=. if max_merge!=score1
replace score1=0  if max_merge!=score1
drop max_merge

bysort gp_id: egen max_merge = max(score1)
drop if max_merge!=score1

* drop remaining duplicates based on name
duplicates tag gp_id, gen(tag)
br gp_id gpid_nr if tag>0 & gpid_nr!=. 
drop if gp_id==1682 & gpid_nr==1750
drop if gp_id==3255 & gpid_nr==3640
drop if gp_id==3256 & gpid_nr==3639
drop if gp_id==3597 & gpid_nr==3997
drop if gp_id==3598 & gpid_nr==3996
drop if gp_id==3794 & gpid_nr==4155
drop if gp_id==3795 & gpid_nr==4154

drop if gp_id==3821 & gpid_nr==4175
drop if gp_id==4267 & gpid_nr==4703
drop if gp_id==4906 & gpid_nr==5445
drop if gp_id==5287 & gpid_nr==5843
drop if gp_id==5288 & gpid_nr==5842
drop if gp_id==5925 & gpid_nr==6593
drop if gp_id==6940 & gpid_nr==7455
drop if gp_id==7817 & gpid_nr==8377
drop if gp_id==8856 & gpid_nr==9529
drop if gp_id==13559 & gpid_nr==14693

drop tag

duplicates tag gp_id, gen(tag)
replace gpid_nr=. if tag>0
drop tag

duplicates drop gp_id gpid_nr, force

duplicates tag gpid_nr, gen(tag)
sort gpid_nr gp_id
br gp_id gpid_nr if tag>0 & gpid_nr!=.
drop if gp_id==986 & gpid_nr==996
drop if gp_id==2088 & gpid_nr==2229
drop if gp_id==7132 & gpid_nr==7636
drop if gp_id==8266 & gpid_nr==8871
drop if gp_id==9741 & gpid_nr==10536

drop tag

duplicates tag gpid_nr, gen(tag)
replace gpid_nr=. if tag>0
drop tag

distinct gp_id gpid_nr 
}
*

}
save "$data/vill_lgd_census11_01_nrega.dta", replace



* Table 6, Figure 3, Table A.8, Table A.9
{
use "$data/vill_lgd_census11_01_nrega.dta", clear
drop if gpid_nr==.
merge 1:m gpid_nr using "$data/nrega_gujarat.dta", gen(merge0) keepusing(year *persondaysworked* *householdsworked*)

forv year = 1/3 {
gen year`year' = year==2011+`year'
}
foreach x in persondaysworkednonSC persondaysworkedbySCs persondaysworkedbySTs householdsworkednonSC SChouseholdsworked SThouseholdsworked  {
ren Total`x' `x'
}

gen persondaysworkedSCST = persondaysworkedbySCs + persondaysworkedbySTs
gen householdsworkedSCST = SChouseholdsworked + SThouseholdsworked



* Table 6
{
* Panel 1
foreach out in persondaysworkedSCST persondaysworkednonSC householdsworkedSCST householdsworkednonSC {

local b = 5001 

di "`b' `out'"
qui sum `out', d
qui replace `out'=. if `out'>r(p99)

qui rdrobust `out' gp_pop if inrange(gp_pop,`b'-1000,`b'+999), c(`b') all bwselect(msetwo) vce(cluster gp_pop) covs(gp_scst prop_scst year1 year2 year3) 
di "b" round(e(tau_bc),.001) "  p-val: " round(e(pv_rb),.001) "  ci:["round(e(ci_l_rb),.01) " "  round(e(ci_r_rb),.01) "]   "

qui sum `out' if inrange(gp_pop,`b'-1000,`b'+999)
di "mean: " round(r(mean),.001)

di "obs:" r(N)
di ""

}
*



* Panel 2: Cutoffs = 4001, 6001
* 4001 estimate scaled by 9/11; 6001 estimate scaled by 13/11
{
gen c = 4001 
replace c = 6001 if gp_pop>=5001

gen bwselect = "msetwo" in 1
replace bwselect = "msetwo" in 2

gen vce = "cluster gp_pop" in 1
replace vce = "cluster gp_pop" in 2

gen covs = "gp_scst prop_scst year1 year2 year3" in 1
replace covs = "gp_scst prop_scst year1 year2 year3" in 2

foreach out in persondaysworkedSCST persondaysworkednonSC householdsworkedSCST householdsworkednonSC {

qui rdmc `out' gp_pop if inrange(gp_pop,3001,7000), c(c) bwselect(bwselect) vce(vce) covs(covs)
lincom (c1*9/11 + c2*13/11)/2

qui sum `out' if inrange(gp_pop,3001,7000)
di "mean: " round(r(mean),.001)

di "obs:" r(N)
di ""
}
}
*



* Panel 3: Cutoff = 8001
* Estimate scaled by 17/11
foreach out in persondaysworkedSCST persondaysworkednonSC householdsworkedSCST householdsworkednonSC {
local b = 8001

di "`b' `out'"

qui rdrobust `out' gp_pop if inrange(gp_pop,`b'-1000,`b'+999), c(`b') all bwselect(msetwo) vce(cluster gp_pop) covs(gp_scst prop_scst year1 year2 year3) 
di "b" round(e(tau_bc)*17/11,.001) "  p-val: " round(e(pv_rb),.001) "  ci:["round(e(ci_l_rb)*17/11,.01) " "  round(e(ci_r_rb)*17/11,.01) "]   "

qui sum `out' if inrange(gp_pop,`b'-1000,`b'+999)
di "mean: " round(r(mean),.001)

di "obs:" r(N)
di ""
}
*


}
*



* Figure 4
foreach out in persondaysworkedSCST persondaysworkednonSC {

rdplot `out' gp_pop if inrange(gp_pop,4850,5150), c(5001) nbins(3) p(2) ci(95) graph_options(ylabel(0(1000)2000, labsize(vlarge)) graphregion(color(white)) ytitle(Person-Days, size(16pt)) xtitle(GP Population, size(16pt)) title("") legend(off) ysc(r(0 2500)) xsc(titlegap(2)) xlabel(4850 5001 5150, labsize(vlarge)))

}
*



* Table A.7 Normalized by GP Population
{
* Panel 1
foreach out in persondaysworkedSCST persondaysworkednonSC householdsworkedSCST householdsworkednonSC {
gen `out'_n = `out'/gp_pop

local b = 5001
di "`b'"

qui rdrobust `out'_n gp_pop if inrange(gp_pop,`b'-1000,`b'+999), c(`b') all bwselect(msetwo) vce(cluster gp_pop) covs(gp_scst prop_scst year1 year2 year3) 
di "b" round(e(tau_bc),.001) "  p-val: " round(e(pv_rb),.001) "  ci:["round(e(ci_l_rb),.01) " "  round(e(ci_r_rb),.01) "]   "

qui sum `out'_n if inrange(gp_pop,`b'-1000,`b'+999)
di "mean: " round(r(mean),.001)

di "obs:" r(N)
di ""
}
*



* Panel 2: Cutoffs = 4001, 6001
* 4001 estimate scaled by 9/11; 6001 estimate scaled by 13/11
foreach out in persondaysworkedSCST persondaysworkednonSC householdsworkedSCST householdsworkednonSC {

qui rdmc `out'_n gp_pop if inrange(gp_pop,3001,7000), c(c) bwselect(bwselect) vce(vce) covs(covs)
lincom (c1*9/11 + c2*13/11)/2

qui sum `out'_n if inrange(gp_pop,3001,7000)
di "mean: " round(r(mean),.001)

di "obs:" r(N)
di ""
}
*



* Panel 3: Cutoff = 8001
* Estimate scaled by 17/11
foreach out in persondaysworkedSCST persondaysworkednonSC householdsworkedSCST householdsworkednonSC {

local b = 8001
di "`b'"

qui rdrobust `out'_n gp_pop if inrange(gp_pop,`b'-1000,`b'+999), c(`b') all bwselect(msetwo) vce(cluster gp_pop) covs(gp_scst prop_scst year1 year2 year3) 
di "b" round(e(tau_bc)*17/11,.001) "  p-val: " round(e(pv_rb),.001) "  ci:["round(e(ci_l_rb)*17/11,.01) " "  round(e(ci_r_rb)*17/11,.01) "]   "

qui sum `out'_n if inrange(gp_pop,`b'-1000,`b'+999)
di "mean: " round(r(mean),.001)

di "obs:" r(N)
di ""
}
*

}
*



* Table A.8 Log Transformations
{
* Panel 1
{

* Generate Transformations
{
gen ln_persondays_scst_1 = ln(persondaysworkedSCST)

gen ln_persondays_scst_2 = persondaysworkedSCST
qui sum persondaysworkedSCST
replace ln_persondays_scst_2 = r(mean) if ln_persondays_scst_2==0
replace ln_persondays_scst_2 = ln(ln_persondays_scst_2)

gen ln_persondays_scst_3 = ln(persondaysworkedSCST+1)

gen ln_persondays_scst_4 = ln(persondaysworkedSCST + (persondaysworkedSCST^2 + 1)^.5)
}
*


* Regressions
forv n = 1/4 {
local b = 5001

qui rdrobust ln_persondays_scst_`n' gp_pop if inrange(gp_pop,`b'-1000,`b'+999), c(`b') all bwselect(msetwo) vce(cluster gp_pop) covs(gp_scst prop_scst year1 year2 year3) 
di "b" round(e(tau_bc),.001) "  p-val: " round(e(pv_rb),.001) "  ci:["round(e(ci_l_rb),.01) " "  round(e(ci_r_rb),.01) "]   "

qui sum ln_persondays_scst_`n' if inrange(gp_pop,`b'-1000,`b'+999)
di "mean: " round(r(mean),.001)

di "obs:" r(N)
di ""

di " "
}
*

}
*



* Panel 2
{

* Generate Transformations
{
gen ln_hhs_scst_1 = ln(householdsworkedSCST)

gen ln_hhs_scst_2 = householdsworkedSCST
qui sum householdsworkedSCST
replace ln_hhs_scst_2 = r(mean) if ln_hhs_scst_2==0
replace ln_hhs_scst_2 = ln(ln_hhs_scst_2)

gen ln_hhs_scst_3 = ln(householdsworkedSCST+1)

gen ln_hhs_scst_4 = ln(householdsworkedSCST + (householdsworkedSCST^2 + 1)^.5)
}
*


* Regressions
forv n = 1/4 {
local b = 5001

qui rdrobust ln_hhs_scst_`n' gp_pop if inrange(gp_pop,`b'-1000,`b'+999), c(`b') all bwselect(msetwo) vce(cluster gp_pop) covs(gp_scst prop_scst year1 year2 year3) 
di "b" round(e(tau_bc),.001) "  p-val: " round(e(pv_rb),.001) "  ci:["round(e(ci_l_rb),.01) " "  round(e(ci_r_rb),.01) "]   "

qui sum ln_hhs_scst_`n' if inrange(gp_pop,`b'-1000,`b'+999)
di "mean: " round(r(mean),.001)

di "obs:" r(N)
di ""

di " "
}
*

}
*


}
* 

}
*



* Table A.12 Robustness Checks
{
use "$data/vill_lgd_census11_01_nrega.dta", clear
drop if gpid_nr==.
merge 1:m gpid_nr using "$data/nrega_gujarat.dta", gen(merge0) keepusing(year *persondaysworked* *householdsworked*)

forv year = 1/3 {
gen year`year' = year==2011+`year'
}
foreach x in persondaysworkednonSC persondaysworkedbySCs persondaysworkedbySTs householdsworkednonSC SChouseholdsworked SThouseholdsworked  {
ren Total`x' `x'
}

gen persondaysworkedSCST = persondaysworkedbySCs + persondaysworkedbySTs
gen householdsworkedSCST = SChouseholdsworked + SThouseholdsworked



foreach out in persondaysworkedSCST householdsworkedSCST {
di "`out'"
local b = 5001
di "`b'"

replace `out'=`out'/100
gen `out'2 = `out' // for the including-outliers specification
qui sum `out', d
qui replace `out'=. if `out'>r(p99)
local one = 0



* Columns 1-2: Alternative Kernels
foreach kernel in uni epa {

qui rdrobust `out' gp_pop if inrange(gp_pop,`b'-1000,`b'+999), c(`b') all bwselect(msetwo) vce(cluster gp_pop) covs(gp_scst prop_scst year1 year2 year3) kernel(`kernel')
di "b" round(e(tau_bc),.001) "  p-val: " round(e(pv_rb),.001) "  ci:["round(e(ci_l_rb),.01) " "  round(e(ci_r_rb),.01) "]   "

}
*



* Columns 3-4: Local Polynomial Degree
forv p = 2/3 {

qui rdrobust `out' gp_pop if inrange(gp_pop,`b'-1000,`b'+999), c(`b') all bwselect(msetwo) vce(cluster gp_pop) covs(gp_scst prop_scst year1 year2 year3) p(`p')
di "b" round(e(tau_bc),.001) "  p-val: " round(e(pv_rb),.001) "  ci:["round(e(ci_l_rb),.01) " "  round(e(ci_r_rb),.01) "]   "

}
*



* Column 5: Excluding Controls 
{

qui rdrobust `out' gp_pop if inrange(gp_pop,`b'-1000,`b'+999), c(`b') all bwselect(msetwo) vce(cluster gp_pop)
di "b" round(e(tau_bc),.001) "  p-val: " round(e(pv_rb),.001) "  ci:["round(e(ci_l_rb),.01) " "  round(e(ci_r_rb),.01) "]   "

}
*



* Column 6: Including Outliers
{

qui rdrobust `out'2 gp_pop if inrange(gp_pop,`b'-1000,`b'+999), c(`b') all bwselect(msetwo) vce(cluster gp_pop) covs(gp_scst prop_scst year1 year2 year3)
di "b" round(e(tau_bc),.001) "  p-val: " round(e(pv_rb),.001) "  ci:["round(e(ci_l_rb),.01) " "  round(e(ci_r_rb),.01) "]   "

}
*


}
*

}
*



* A.13 Alternative Bandwidths
{
use "$data/vill_lgd_census11_01_nrega.dta", clear
drop if gpid_nr==.
merge 1:m gpid_nr using "$data/nrega_gujarat.dta", gen(merge0) keepusing(year *persondaysworked* *householdsworked*)

forv year = 1/3 {
gen year`year' = year==2011+`year'
}
foreach x in persondaysworkednonSC persondaysworkedbySCs persondaysworkedbySTs householdsworkednonSC SChouseholdsworked SThouseholdsworked  {
ren Total`x' `x'
}

gen persondaysworkedSCST = persondaysworkedbySCs + persondaysworkedbySTs
gen householdsworkedSCST = SChouseholdsworked + SThouseholdsworked



foreach out in persondaysworkedSCST householdsworkedSCST {
di "`out'"
local b = 5001
di "`b'"

qui sum `out', d
qui replace `out'=. if `out'>r(p99)
replace `out'=`out'/100

foreach bw in mserd msetwo msesum msecomb2 cerrd certwo cersum cercomb2 {
di "`bw'"
qui rdrobust `out' gp_pop if inrange(gp_pop,`b'-1000,`b'+999), c(`b') all bwselect(`bw') vce(cluster gp_pop) covs(gp_scst prop_scst year1 year2 year3) 
di "b" round(e(tau_bc),.001) "  p-val: " round(e(pv_rb),.001) "  ci:["round(e(ci_l_rb),.01) " "  round(e(ci_r_rb),.01) "]   "

}
*

}
*

}
*



* A.14 2SLS
{
use "$data/Samras_GPs.dta", clear
keep lgd_id
keep if lgd_id!=.

* Merge in GP-level IDs 
merge 1:1 lgd_id using "$data/vill_lgd_census11_01.dta", gen(merge) keepusing(gp_id)
keep if merge==3 
keep gp_id 

merge 1:m gp_id using "$data/vill_lgd_census11_01_nrega.dta", gen(merge)
gen samras = merge==3

drop if gpid_nr==.
merge 1:m gpid_nr using "$data/nrega_gujarat.dta", gen(merge0) keepusing(year *persondaysworked* *householdsworked*)

forv year = 1/3 {
gen year`year' = year==2011+`year'
}
foreach x in persondaysworkednonSC persondaysworkedbySCs persondaysworkedbySTs householdsworkednonSC SChouseholdsworked SThouseholdsworked  {
ren Total`x' `x'
}

gen persondaysworkedSCST = persondaysworkedbySCs + persondaysworkedbySTs
gen householdsworkedSCST = SChouseholdsworked + SThouseholdsworked



foreach out in persondaysworkedSCST householdsworkedSCST {
di "`out'"
local b = 5001 
di "`b'"

qui sum `out', d
qui replace `out'=. if `out'>r(p99)
replace `out'=`out'/100

qui rdrobust `out' gp_pop if inrange(gp_pop,`b'-1000,`b'+999), c(`b') all bwselect(msetwo) vce(cluster gp_pop) covs(gp_scst prop_scst year1 year2 year3) fuzzy(samras)

di "b" round(e(tau_bc),.001) "  p-val: " round(e(pv_rb),.001) "  ci:["round(e(ci_l_rb),.01) ", "  round(e(ci_r_rb),.01) "]   "

}
}
*



* A.15 "Diff-in-diff"
{
use "$data/vill_lgd_census11_01_nrega.dta", clear
drop if gpid_nr==.
merge 1:m gpid_nr using "$data/nrega_gujarat.dta", gen(merge0) keepusing(year *persondaysworked* *householdsworked*)

forv year = 1/3 {
gen year`year' = year==2011+`year'
}
foreach x in persondaysworkednonSC persondaysworkedbySCs persondaysworkedbySTs householdsworkednonSC SChouseholdsworked SThouseholdsworked  {
ren Total`x' `x'
}

gen persondaysworkedSCST = persondaysworkedbySCs + persondaysworkedbySTs
gen householdsworkedSCST = SChouseholdsworked + SThouseholdsworked


local b = 5001
foreach out in persondaysworkedSCST householdsworkedSCST {
qui sum `out', d
qui replace `out'=. if `out'>r(p99)
replace `out'=`out'/100
}



* Expand dataset in order to implement rdmc
{
expand 2
bysort gpid_nr year: gen copy = _n

gen 	c = 4001 if gp_pop>=3001 & gp_pop<5001 & (copy==1 | copy==3)
replace c = 6001 if gp_pop>=5001 & gp_pop<7001 & (copy==1 | copy==3)
replace c = 8001 if gp_pop>=7001 & gp_pop<9001 & (copy==1 | copy==3)

replace c = 5001 if gp_pop>=4001 & gp_pop<6001 & (copy==2 | copy==4)
replace c = 7001 if gp_pop>=6001 & gp_pop<8001 & (copy==2 | copy==4)
replace c = 9001 if gp_pop>=8001 & gp_pop<10001 & (copy==2 | copy==4)
}
*



gen bwselect = "msetwo" in 1
forv n = 2/6 {
replace bwselect = "msetwo" in `n'
}

gen vce = "cluster gp_pop" in 1
forv n = 2/6 {
replace vce = "cluster gp_pop" in `n'
}

gen covs = "gp_scst prop_scst year1 year2 year3" in 1
forv n = 2/6 {
replace covs = "gp_scst prop_scst year1 year2 year3" in `n'
}



foreach out in persondaysworkedSCST householdsworkedSCST {

qui rdmc `out' gp_pop, c(c) bwselect(bwselect) covs(covs) vce(vce)

lincom c2

lincom c2 - (c1 + c3)/2 - (c5 - (c4 + c6)/2)

lincom c2 - (c1*9/11 + c3*13/11)/2 - (c5*17/11 - (c4*15/11 + c6*19/11)/2)

}
*


}
*

